/*==============================================================================
This .do file brings in UK Migration data.

There are four components of NUTS 2 migration: 

1. Inmigration from another NUTS 1
	This is given by the row sum of, Origin: "GREAT BRITAIN 
	(excluding region)" [so essentially you are starting from outside the
	NUTS 1], from Table 1a.
2. Outmigration to another NUTS 1
	This is given in Table 2a, which gives total outmigration from each 
	region within the NUTS 1, to other regions of GB outside the NUTS 1 
	region they started in.
	
3. Outmigration to another NUTS 2 within the same NUTS 1
4. Inmigration from another NUTS 2 within the same NUTS 1
	These latter two types of migration are calculated using the migration 
	matrices for within NUTS 1 migration, given by Table 1a.
	The matrix is converted into a vector with every possible starting and 
	ending combination.
	If the same NUTS 2 is both the starting and ending destination, this 
	row is dropped.
	Collapse (sum) migration, by(nuts_start) is outmigration from the NUTS 
	region
	Collapse (sum) migration, by(nuts_end) will be inmigration in the NUTS 
	region

In the code below, (1), (3) and (4) above are calculated in Part I, using Table 
1a. (2) is calculated in Part II, using Table 2a. 
	
NOTE: This is a 10% sample, so we multiply migration by 10.

With the exception of Northern Ireland, the source is the 1971 Census County 
Regional reports. 

==============================================================================*/

cd "$insheet_files/Migration/UK Migration Matrices"
clear
set more off

local regions_of_uk East_Anglia Yorkshire_Humberside West_Midlands Wales South_East  South_West East_Midlands North North_West Scotland

* I. Inmigration from another NUTS 1, and Inmigration and Outmigration Within Nuts 2

foreach region of local regions_of_uk {

	* I start with the calculation of 1. Inmigration from another NUTS 1, 
	* by just taking the reported inmigration from outside the NUTS 1 given 
	* in Table 1a.
	clear
	insheet using "`region' 1a.csv", name case
	
	quietly keep if nuts_start_name == "GREAT BRITAIN (excluding region)" /// 
			| nuts_start_name == "England and Wales" 
	quietly reshape long UK, i(nuts_start) j(nuts) string
	quietly rename UK inmigration_outside_nuts1
	quietly replace nuts = substr("UK"+nuts,1,4)
	collapse (sum) inmigration_outside_nuts1, by(nuts)
	
	tempfile inmigration_outside_nuts1
	quietly save `inmigration_outside_nuts1.dta'
	
	* Next, I calculate inmigration and outmigration within the NUTS 2. 
	clear
	insheet using "`region' 1a.csv", name case
	quietly drop if nuts_start_name == "GREAT BRITAIN (excluding region)" ///
		| nuts_start_name == "England and Wales"  
	//Note: what remains is a migration matrix for inside the NUTS 1 region
	quietly drop nuts_start_name
	
	tempfile region_full
	quietly save `region_full.dta'
	
	local N = _N
	forval i= 1/`N' {
		quietly use `region_full.dta', clear
		quietly gen n=_n
		quietly keep if n==`i' 
		quietly drop n
		quietly reshape long UK, i(nuts_start) j(nuts_end) string
		quietly replace nuts_end = "UK"+substr(nuts_end,1,2)
		quietly rename UK migration
		
		tempfile UK_`i'
		quietly save `UK_`i'.dta'
		
	}	

	clear 
	use `UK_1.dta'
	forval i=2/`N' {
		quietly append using `UK_`i'.dta'
	}
		
	quietly tostring nuts_end, replace
	
	
	capture drop if nuts_start==nuts_end //movement within same NUTS2 region

	tempfile precollapse
	quietly save `precollapse.dta' 
	//collapse by starting region, this is outmigration
	capture collapse (sum) migration, by(nuts_start) 
	
	quietly rename migration outmigration_within_nuts1
	quietly rename nuts_start nuts
	
	tempfile outmigration_within_nuts1
	quietly save `outmigration_within_nuts1.dta' 
	
	use `precollapse.dta', clear
	//collapse by ending region, this is inmigration
	capture collapse (sum) migration, by(nuts_end) 

	quietly rename migration inmigration_within_nuts1
	quietly rename nuts_end nuts
	
	tempfile inmigration_within_nuts1
	quietly save `inmigration_within_nuts1.dta' 
	
	use `inmigration_outside_nuts1.dta' , clear
	quietly merge 1:1 nuts using `outmigration_within_nuts1.dta' , nogen 
	quietly merge 1:1 nuts using `inmigration_within_nuts1.dta' , nogen 

	tempfile `region'_in_out
	quietly save ``region'_in_out.dta'

}

use `East_Anglia_in_out.dta'
foreach region of local regions_of_uk  {
	append using ``region'_in_out.dta'
}
duplicates drop 

tempfile all_region_in_out
save 	`all_region_in_out.dta'

*II.Calculate outmigration to regions outside of the NUTS 1 using Table 2a.
clear
foreach region of local regions_of_uk  {
	insheet using "`region' 2a.csv"

	egen outmigration_outside_NUTS1 = rsum(out*)
	collapse (sum) outmigration, by(nuts)
	rename nuts_start nuts
	
	tempfile `region'_out_nuts1
	save ``region'_out_nuts1.dta'
	clear
}

use `East_Anglia_out_nuts1.dta'
foreach region of local regions_of_uk {
	append using ``region'_out_nuts1.dta'
}
duplicates drop 

merge 1:1 nuts using `all_region_in_out.dta'

egen inmigration 	= rsum(inmigration*)
egen outmigration 	= rsum(outmigration*) 

* 10 % Sample. Multiply by 10. 
replace inmigration 	= inmigration*10
replace outmigration 	= outmigration*10

********************************************************************************
*Add data for Northern Ireland
local N = _N + 1
set obs `N'

replace nuts = "UKN0" if nuts==""
replace outmigration = 14860 if nuts=="UKN0"
replace inmigration = 10060 if nuts=="UKN0"

local N = _N + 1
set obs `N'

replace nuts = "UKN" if nuts==""
replace outmigration = 14860 if nuts=="UKN"
replace inmigration = 10060 if nuts=="UKN"

********************************************************************************

keep nuts inmigration outmigration 

save "$dta_files/uk_migration.dta", replace
